Load required packages.
require(phyloseq)
Loading required package: phyloseq
require(parallelDist)
Loading required package: parallelDist
Warning: package ‘parallelDist’ was built under R version 4.1.2
require(ggplot2)
Loading required package: ggplot2
Warning: package ‘ggplot2’ was built under R version 4.1.2
require(randomForest)
Loading required package: randomForest
Warning: package ‘randomForest’ was built under R version 4.1.2randomForest 4.7-1.1
Type rfNews() to see new features/changes/bug fixes.
Attaching package: ‘randomForest’
The following object is masked from ‘package:ggplot2’:
margin
require(Boruta)
Loading required package: Boruta
Warning: package ‘Boruta’ was built under R version 4.1.2
require(phyloseq)
require(stats)
require(dplyr)
Loading required package: dplyr
Warning: package ‘dplyr’ was built under R version 4.1.2
Attaching package: ‘dplyr’
The following object is masked from ‘package:randomForest’:
combine
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
require(readr)
Loading required package: readr
Warning: package ‘readr’ was built under R version 4.1.2
require(pheatmap)
Loading required package: pheatmap
require(tidyverse)
Loading required package: tidyverse
Warning: package ‘tidyverse’ was built under R version 4.1.2Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.2 ──✔ tibble 3.1.8 ✔ stringr 1.5.0
✔ tidyr 1.2.1 ✔ forcats 0.5.2
✔ purrr 0.3.5 Warning: package ‘tibble’ was built under R version 4.1.2Warning: package ‘tidyr’ was built under R version 4.1.2Warning: package ‘purrr’ was built under R version 4.1.2Warning: package ‘stringr’ was built under R version 4.1.2Warning: package ‘forcats’ was built under R version 4.1.2── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::combine() masks randomForest::combine()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
✖ randomForest::margin() masks ggplot2::margin()
require(micrUBIfuns)
Loading required package: micrUBIfuns
Loading required package: data.table
Warning: package ‘data.table’ was built under R version 4.1.2data.table 1.14.6 using 1 threads (see ?getDTthreads). Latest news: r-datatable.com
**********
This installation of data.table has not detected OpenMP support. It should still work but in single-threaded mode.
This is a Mac. Please read https://mac.r-project.org/openmp/. Please engage with Apple and ask them for support. Check r-datatable.com for updates, and our Mac instructions here: https://github.com/Rdatatable/data.table/wiki/Installation. After several years of many reports of installation problems on Mac, it's time to gingerly point out that there have been no similar problems on Windows or Linux.
**********
Attaching package: ‘data.table’
The following object is masked from ‘package:purrr’:
transpose
The following objects are masked from ‘package:dplyr’:
between, first, last
Loading required package: ggsci
Loading required package: ComplexHeatmap
Loading required package: grid
========================================
ComplexHeatmap version 2.8.0
Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
Github page: https://github.com/jokergoo/ComplexHeatmap
Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
If you use it in published research, please cite:
Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
genomic data. Bioinformatics 2016.
The new InteractiveComplexHeatmap package can directly export static
complex heatmaps into an interactive Shiny app with zero effort. Have a try!
This message can be suppressed by:
suppressPackageStartupMessages(library(ComplexHeatmap))
========================================
! pheatmap() has been masked by ComplexHeatmap::pheatmap(). Most of the arguments
in the original pheatmap() are identically supported in the new function. You
can still use the original function by explicitly calling pheatmap::pheatmap().
Attaching package: ‘ComplexHeatmap’
The following object is masked from ‘package:pheatmap’:
pheatmap
Loading required package: vegan
Warning: package ‘vegan’ was built under R version 4.1.2Loading required package: permute
Warning: package ‘permute’ was built under R version 4.1.2Loading required package: lattice
This is vegan 2.6-4
require(circlize)
Loading required package: circlize
Warning: package ‘circlize’ was built under R version 4.1.2========================================
circlize version 0.4.15
CRAN page: https://cran.r-project.org/package=circlize
Github page: https://github.com/jokergoo/circlize
Documentation: https://jokergoo.github.io/circlize_book/book/
If you use it in published research, please cite:
Gu, Z. circlize implements and enhances circular visualization
in R. Bioinformatics 2014.
This message can be suppressed by:
suppressPackageStartupMessages(library(circlize))
========================================
require(phylosmith)
Loading required package: phylosmith
require(phangorn)
Loading required package: phangorn
Warning: package ‘phangorn’ was built under R version 4.1.2Loading required package: ape
Warning: package ‘ape’ was built under R version 4.1.2
Attaching package: ‘ape’
The following object is masked from ‘package:circlize’:
degree
Attaching package: ‘phangorn’
The following objects are masked from ‘package:vegan’:
diversity, treedist
require(gridExtra)
Loading required package: gridExtra
Attaching package: ‘gridExtra’
The following object is masked from ‘package:dplyr’:
combine
The following object is masked from ‘package:randomForest’:
combine
require(pairwiseAdonis)
Loading required package: pairwiseAdonis
Loading required package: cluster
Warning: package ‘cluster’ was built under R version 4.1.2
require(car)
Loading required package: car
Warning: package ‘car’ was built under R version 4.1.2Loading required package: carData
Warning: package ‘carData’ was built under R version 4.1.2
Attaching package: ‘car’
The following object is masked from ‘package:purrr’:
some
The following object is masked from ‘package:dplyr’:
recode
Load necessary files from Dada2 processing script.
load("/Users/gordoncuster/Desktop/Git_Projects/BeeBread/Data/16S/DADA2outputs/dada2_outputs_16S_BeeBread_wtree.RData")
Read in metadata
metadata<-read.csv("/Users/gordoncuster/Desktop/Git_Projects/BeeBread/Data/BB_sample-metadata.txt", sep = "\t")
rownames(metadata)<-metadata$X
metadata$Region_Site<-paste( metadata$Region, metadata$Site, sep = "")
metadata$Treatment_Site<-paste( metadata$Treatment, metadata$Site, sep = "")
Convert files to correct format for phyloseq object and then rename samples in OTU table to match the format of the metadata file. Create phyloseq object and root phylogenetic tree for usage with distance metrics (e.g., Unifrac).
#assign object type to merge into phyloseq object
md<-sample_data(metadata)
otu<-otu_table(seqtab.nochim, taxa_are_rows = F)
tax_tab<-tax_table(taxa)
#check sample names of both otu table and metadata
#remove .fastq.gz from otu table names
sample_names(otu)<-str_split(sample_names(otu), pattern = ".fastq.gz", simplify = T) [,1]
#since they match, you can create the phyloseq object
bb16S_orig<-phyloseq(md, otu, tax_tab, fitGTR$tree)
#root tree for distance metrics like unifrac
set.seed(11)
phy_tree(bb16S_orig)<-root(phy_tree(bb16S_orig), sample(taxa_names(bb16S_orig), 1), resolve.root = TRUE)
is.rooted(phy_tree(bb16S_orig))
[1] TRUE
#Pre-processing. Rename OTU IDS to something more manageable. We save the ASCV sequences in case we need them later (e.g., BLASTN searches). Create Bacterial and Archaeal phyloseq objects, but first we remove Chloroplasts.
#Extract original IDs for future reference.
seq_df_w_OTUID<-data.frame(OTUID = 1:ntaxa(bb16S_orig), Sequence = taxa_names(bb16S_orig))
taxa_names(bb16S_orig)<-paste("OTU_" , 1:ntaxa(bb16S_orig), sep = "")
#remove chloroplast
#978 chloroplast or mitochondrial taxa
bb16S_orig_nc<-subset_taxa(bb16S_orig, Family!= "Mitochondria" | is.na(Family) & Order!="Chloroplast" | is.na(Order))
#1337 bacterial taxa - 9 non-bacteria removed.
bb16S_bac<-subset_taxa(bb16S_orig_nc, Kingdom == "Bacteria")
#9 archaea
bb16S_arch<-subset_taxa(bb16S_orig_nc, Kingdom == "Archaea")
Remove control samples as there is only as single replicate of each.
ps_wo_control<-subset_samples(bb16S_bac, Treatment != "none")
Examine rarefaction curves. Plateau in most samples under the 1.5k read mark.
rarecurve(otu_table(ps_wo_control), step=50, cex=0.5, main = "Rarefaction Curve", ylab="Richness", xlab="Read Depth")
set.seed(11)
sort(sample_sums(ps_wo_control))
Dal_IPM1054_A Dal_CON1058_A Dal_CON1057_A Spr_CON3017_A Dal_IPM1055_A Dal_CF1052_A Spr_CF3016_A Dal_IPM1055_B
409 496 894 1188 1378 1469 1718 1962
Dal_CON1058_B Sum_IPM4002_B Spr_CON3017_B Sla_CF2066_B Sla_IPM2071_B Dal_CF1051_A Dal_CF1051_B Dal_IPM1054_B
2036 2037 2254 2287 2288 2318 2359 2370
Dal_CON1057_B Sla_CF2065_B Spr_CON3019_A Sla_IPM2070_A Spr_CON3019_Be2 Spr_CF3013_A Spr_IPM3023_B Sla_CON2061_A
2396 2744 2835 2864 2886 3503 3513 3871
Spr_IPM3023_A Sum_CON4011_B Bel_IPM3042_A Sla_IPM2070_B Dal_CF1052_B Sum_IPM4001_A Sla_CF2066_A Bel_CF3047_A
3898 4039 4196 4553 4710 4721 4749 4845
Sum_CON4011_A Sum_IPM4001_B Spr_CF3016_B Sum_CF4006_B Sum_CF4005_A Bel_CF3045_B Sla_CF2065_A Sum_IPM4002_A
5502 5625 5693 5819 5935 5938 6648 6815
Spr_IPM3024_A Ber_CF2006_B Bel_CON3039_A Sum_CON4010_B Sla_CON2063_A Ber_IPM2002_B Spr_IPM3024_B Spr_CF3013_B
6930 7047 7119 7147 7253 7462 7669 7888
Bel_CON3037_A Sum_CF4006_A Bel_IPM3042_B Ber_CON2011_Be2 Sum_CF4005_B Bel_CF3045_A Sum_CON4010_A Ber_CON2010_B
8129 8276 9187 9315 9820 10444 10531 11063
Ber_CON2011_A Bel_IPM3044_B Sla_IPM2071_A Bel_CON3039_B Sla_CON2061_B Bel_CF3047_B Ber_CF2007_A Bel_IPM3044_A
11158 11247 12197 12295 13463 14099 15170 15601
Ber_CF2007_B Ber_IPM2002_A Ber_CON2010_A Bel_CON3037_B Ber_IPM2001_A Ber_IPM2001_B Ber_CF2006_A Sla_CON2063_B
17086 18639 19194 20875 21680 22834 33732 47127
summary(sample_sums(ps_wo_control))
Min. 1st Qu. Median Mean 3rd Qu. Max.
409 2812 5877 8020 10466 47127
sd(sample_sums(ps_wo_control))
[1] 7801.64
bb16S_bac_rarefy<-rarefy_even_depth(ps_wo_control, sample.size = 1100, rngseed = 14, trimOTUs = T)
`set.seed(14)` was used to initialize repeatable random subsampling.
Please record this for your records so others can reproduce.
Try `set.seed(14); .Random.seed` for the full vector
...
3 samples removedbecause they contained fewer reads than `sample.size`.
Up to first five removed samples are:
Dal_CON1057_ADal_CON1058_ADal_IPM1054_A
...
486OTUs were removed because they are no longer
present in any sample after random subsampling
...
#517 otus were removed during rarefaction.
#To move forward, we can hellinger transform and go from there.
bb16S_bac_hellinger<-transform_sample_counts(ps_wo_control, function(x) sqrt(x / sum(x)))
#maximum liklihood point estimates
bb16S_bac_ML<- transform_sample_counts(ps_wo_control, function(x) x / sum(x))
#Alpha Diversity
sf2a <- plot_richness(bb16S_bac_rarefy, x = "Site", measures = c("Shannon", "Observed", "Chao1"), color = "Site") + geom_boxplot() + ggtitle("Sampling Location") + theme_classic() + xlab("Sampling Location") + theme(axis.text=element_text(size=12, angle = 45, hjust = 0.75),
axis.title=element_text(size=14))
sf2b <- plot_richness(bb16S_bac_rarefy, x = "Site", measures = c("Shannon", "Observed", "Chao1"), color = "Treatment") + geom_boxplot() + ggtitle("Sampling Location by Treatment") + theme_classic() + ylab("") + xlab("Sampling Location") +theme(axis.text=element_text(size=12, angle = 45, hjust = 0.75),
axis.title=element_text(size=14))
sf2c <-plot_richness(bb16S_bac_rarefy, x = "Site", measures = c("Shannon", "Observed", "Chao1"), color = "Timepoint") + geom_boxplot() + ggtitle("Sampling Location by Sammpling Time") + theme_classic() + xlab("Sampling Location") +theme(axis.text=element_text(size=12, angle = 45, hjust = 0.75),
axis.title=element_text(size=14))
sf2d <-plot_richness(bb16S_bac_rarefy, x = "Timepoint", measures = c("Shannon", "Observed", "Chao1"), color = "Treatment") + geom_boxplot() + ggtitle("Treatment by Sampling Time") + theme_classic() + ylab("") + xlab("Sampling Timepoint") +theme(axis.text=element_text(size=12, angle = 45, hjust =0.75),
axis.title=element_text(size=14))
sf2 <- grid.arrange(sf2a, sf2b, sf2c, sf2d)
plot(sf2)
Calculate alpha diversity metrics
#create alpha diversity table and prep data to include grouping columns
richness_BB16S_bac<-estimate_richness(bb16S_bac_rarefy, measures = c("Shannon", "Observed", "Chao1"))
#add metadata
richness_BB16S_bac$Site<-sample_data(bb16S_bac_rarefy)$Site
richness_BB16S_bac$Timepoint<-sample_data(bb16S_bac_rarefy)$Timepoint
richness_BB16S_bac$Treatment<-sample_data(bb16S_bac_rarefy)$Treatment
richness_BB16S_bac$Treatment_Timepoint<-sample_data(bb16S_bac_rarefy)$Treatment_Timepoint
Tests of Shannon diversity
#ANOVA and Tukey's PW comparisons
#Run a full 3 way ANOVA with interactions
mod<-aov(Shannon ~ Site * Treatment * Timepoint, data = richness_BB16S_bac)
#residuals look fine.
shapiro.test(mod$residuals)
hist(mod$residuals)
summary(mod)
#Summary of model shows site and timepoint significant predictors
#Interaction of site and timepoint
#test of heterogentity
leveneTest(Shannon ~ Site, data = richness_BB16S_bac)
leveneTest(Shannon ~ Treatment, data = richness_BB16S_bac)
leveneTest(Shannon ~ Timepoint, data = richness_BB16S_bac)
#significant Levene's test for Site. We will run an additional Kruskal Wallis for site.
kruskal.test(Shannon ~ Site, data = richness_BB16S_bac)
#What about pairwise differences in site
TukeyHSD(mod, "Site")
plot(TukeyHSD(mod, "Site"), las=1, cex.axis = 0.4)
#Several significant pairwise differences to accompany the significant global test for interaction term. We can
TukeyHSD(mod, "Site:Timepoint")
plot(TukeyHSD(mod, "Site:Timepoint"), las=1, cex.axis = 0.4)
Tests of Chao 1
#ANOVA and Tukey's PW comparisons
#Run a full 3 way ANOVA with interactions
mod<-aov(Chao1 ~ Site * Treatment * Timepoint, data = richness_BB16S_bac)
#residuals look fine.
shapiro.test(mod$residuals)
hist(mod$residuals)
summary(mod)
leveneTest(Chao1 ~ Site, data = richness_BB16S_bac)
leveneTest(Chao1 ~ Treatment, data = richness_BB16S_bac)
leveneTest(Chao1 ~ Timepoint, data = richness_BB16S_bac)
#Site and the interactions with site are significant.
TukeyHSD(mod, "Site")
plot(TukeyHSD(mod, "Site"), las=1, cex.axis = 0.4)
Tests of Observed - unique taxa in a sample.
#ANOVA and Tukey's PW comparisons
#Run a full 3 way ANOVA with interactions
mod<-aov(Observed ~ Site * Treatment * Timepoint, data = richness_BB16S_bac)
#residuals look fine.
shapiro.test(mod$residuals)
summary(mod)
hist(mod$residuals)
leveneTest(Observed ~ Site, data = richness_BB16S_bac)
leveneTest(Observed ~ Treatment, data = richness_BB16S_bac)
leveneTest(Observed ~ Timepoint, data = richness_BB16S_bac)
#Summary of model shows site and timepoint to be a significant predictor as well as the site interaction terms.
TukeyHSD(mod, "Site")
plot(TukeyHSD(mod, "Site"), las=1, cex.axis = 0.4)
#Beta diversity
#extract data from phyloseq object
tab_adonis<-data.frame(otu_table(bb16S_bac_ML))
sd_adonis<-data.frame(sample_data(bb16S_bac_ML))
sd_adonis$Treatment <-as.factor(sd_adonis$Treatment)
sd_adonis$Site <-as.factor(sd_adonis$Site)
dist_w<-phyloseq::distance(bb16S_bac_ML, method = "wunifrac")
dist_uw<-phyloseq::distance(bb16S_bac_ML, method = "unifrac")
#full model with no strata arguemnt. Essentially the effect of each main effect without controlling for any potential groups.
#weighted
adonis2(dist_w ~ Site * Treatment * Timepoint, data= sd_adonis, permutations = 1000)
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 1000
adonis2(formula = dist_w ~ Site * Treatment * Timepoint, data = sd_adonis, permutations = 1000)
Df SumOfSqs R2 F Pr(>F)
Site 5 0.54622 0.31058 8.1498 0.000999 ***
Treatment 2 0.02938 0.01671 1.0960 0.362637
Timepoint 1 0.05705 0.03244 4.2560 0.023976 *
Site:Treatment 10 0.31006 0.17630 2.3131 0.005994 **
Site:Timepoint 5 0.22812 0.12971 3.4037 0.003996 **
Treatment:Timepoint 2 0.02040 0.01160 0.7611 0.548452
Site:Treatment:Timepoint 10 0.08489 0.04827 0.6333 0.872128
Residual 36 0.48256 0.27439
Total 71 1.75868 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#unweighted
adonis2(dist_uw ~ Site * Treatment * Timepoint, data= sd_adonis, permutations = 1000)
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 1000
adonis2(formula = dist_uw ~ Site * Treatment * Timepoint, data = sd_adonis, permutations = 1000)
Df SumOfSqs R2 F Pr(>F)
Site 5 2.2367 0.15888 2.7682 0.000999 ***
Treatment 2 0.4101 0.02913 1.2690 0.042957 *
Timepoint 1 0.4782 0.03397 2.9590 0.000999 ***
Site:Treatment 10 1.7974 0.12768 1.1122 0.075924 .
Site:Timepoint 5 1.1986 0.08514 1.4834 0.000999 ***
Treatment:Timepoint 2 0.3791 0.02693 1.1731 0.129870
Site:Treatment:Timepoint 10 1.7600 0.12502 1.0891 0.118881
Residual 36 5.8178 0.41325
Total 71 14.0780 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
pairwise.adonis2(dist_uw ~ Treatment + Timepoint, strata = "Site", data= sd_adonis, nperm = 1000)
$parent_call
[1] "dist_uw ~ Treatment + Timepoint , strata = Site , permutations 1000"
$CF_vs_CON
Df SumOfSqs R2 F Pr(>F)
Treatment 1 0.1895 0.01997 0.9580 0.339660
Timepoint 1 0.3959 0.04173 2.0016 0.000999 ***
Residual 45 8.9000 0.93829
Total 47 9.4853 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
$CF_vs_ORG
Df SumOfSqs R2 F Pr(>F)
Treatment 1 0.2167 0.02362 1.1425 0.091908 .
Timepoint 1 0.4244 0.04624 2.2372 0.000999 ***
Residual 45 8.5358 0.93014
Total 47 9.1769 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
$CON_vs_ORG
Df SumOfSqs R2 F Pr(>F)
Treatment 1 0.2090 0.02250 1.0745 0.103896
Timepoint 1 0.3257 0.03507 1.6743 0.000999 ***
Residual 45 8.7540 0.94243
Total 47 9.2887 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
attr(,"class")
[1] "pwadstrata" "list"
#with strata argument
adonis2(dist_w ~ Site * Treatment * Timepoint, data= sd_adonis, strata = sd_adonis$Site, permutations = 1000)
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Blocks: strata
Permutation: free
Number of permutations: 1000
adonis2(formula = dist_w ~ Site * Treatment * Timepoint, data = sd_adonis, permutations = 1000, strata = sd_adonis$Site)
Df SumOfSqs R2 F Pr(>F)
Site 5 0.54622 0.31058 8.1498 0.020979 *
Treatment 2 0.02938 0.01671 1.0960 0.352647
Timepoint 1 0.05705 0.03244 4.2560 0.020979 *
Site:Treatment 10 0.31006 0.17630 2.3131 0.017982 *
Site:Timepoint 5 0.22812 0.12971 3.4037 0.004995 **
Treatment:Timepoint 2 0.02040 0.01160 0.7611 0.500500
Site:Treatment:Timepoint 10 0.08489 0.04827 0.6333 0.841159
Residual 36 0.48256 0.27439
Total 71 1.75868 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
adonis2(dist_uw ~ Site * Treatment * Timepoint, data= sd_adonis, strata = sd_adonis$Site, permutations = 1000)
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Blocks: strata
Permutation: free
Number of permutations: 1000
adonis2(formula = dist_uw ~ Site * Treatment * Timepoint, data = sd_adonis, permutations = 1000, strata = sd_adonis$Site)
Df SumOfSqs R2 F Pr(>F)
Site 5 2.2367 0.15888 2.7682 0.000999 ***
Treatment 2 0.4101 0.02913 1.2690 0.025974 *
Timepoint 1 0.4782 0.03397 2.9590 0.000999 ***
Site:Treatment 10 1.7974 0.12768 1.1122 0.072927 .
Site:Timepoint 5 1.1986 0.08514 1.4834 0.000999 ***
Treatment:Timepoint 2 0.3791 0.02693 1.1731 0.110889
Site:Treatment:Timepoint 10 1.7600 0.12502 1.0891 0.122877
Residual 36 5.8178 0.41325
Total 71 14.0780 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
##PW adonis w/ strata = site
pairwise.adonis2(dist_uw ~ Treatment + Timepoint, strata = "Site", data= sd_adonis, nperm = 10000)
$parent_call
[1] "dist_uw ~ Treatment + Timepoint , strata = Site , permutations 10000"
$CF_vs_CON
Df SumOfSqs R2 F Pr(>F)
Treatment 1 0.1895 0.01997 0.9580 0.3471
Timepoint 1 0.3959 0.04173 2.0016 9.999e-05 ***
Residual 45 8.9000 0.93829
Total 47 9.4853 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
$CF_vs_ORG
Df SumOfSqs R2 F Pr(>F)
Treatment 1 0.2167 0.02362 1.1425 0.1021
Timepoint 1 0.4244 0.04624 2.2372 9.999e-05 ***
Residual 45 8.5358 0.93014
Total 47 9.1769 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
$CON_vs_ORG
Df SumOfSqs R2 F Pr(>F)
Treatment 1 0.2090 0.02250 1.0745 0.1106889
Timepoint 1 0.3257 0.03507 1.6743 0.0005999 ***
Residual 45 8.7540 0.94243
Total 47 9.2887 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
attr(,"class")
[1] "pwadstrata" "list"
pairwise.adonis2(dist_w ~ Treatment + Timepoint, strata = "Site", data= sd_adonis, nperm = 10000)
$parent_call
[1] "dist_w ~ Treatment + Timepoint , strata = Site , permutations 10000"
$CF_vs_CON
Df SumOfSqs R2 F Pr(>F)
Treatment 1 0.01940 0.01593 0.7435 0.2974
Timepoint 1 0.02461 0.02021 0.9433 0.2084
Residual 45 1.17413 0.96387
Total 47 1.21814 1.00000
$CF_vs_ORG
Df SumOfSqs R2 F Pr(>F)
Treatment 1 0.00541 0.00503 0.2390 0.82922
Timepoint 1 0.05073 0.04721 2.2416 0.06059 .
Residual 45 1.01844 0.94776
Total 47 1.07458 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
$CON_vs_ORG
Df SumOfSqs R2 F Pr(>F)
Treatment 1 0.01927 0.01592 0.7593 0.30827
Timepoint 1 0.04896 0.04046 1.9296 0.05809 .
Residual 45 1.14173 0.94362
Total 47 1.20996 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
attr(,"class")
[1] "pwadstrata" "list"
Ordinations
#weighted Unifrac
ord_nmds<-ordinate(physeq = bb16S_bac_ML, method = "NMDS", distance = "wunifrac")
ord_nmds$stress
plot_ordination(physeq = bb16S_bac_ML, ordination = ord_nmds, type = "Samples", color = "Site", shape = "Treatment" ) + theme_classic() + ggtitle("Weighted Unifrac NMDS") + theme(plot.title = element_text(hjust = 0.5, size = 18)) + geom_point(size = 3) + theme(axis.text = element_text(size = 14), axis.title=element_text(size=14))
#ggsave("../../Writing/Final_Figs/Weighted_Unifrac_NMDS.eps")
#strongest grouping by Site. Time point also has a smaller effect, but these groupings are more clear than in the CAP ordination. There are several outlier samples that ordinate far to the right on the NMDS1 axis.
#unweighted Unifrac
ord_nmds<-ordinate(physeq = bb16S_bac_ML, method = "NMDS", distance = "unifrac")
ord_nmds$stress
plot_ordination(physeq = bb16S_bac_ML, ordination = ord_nmds, type = "Samples", color = "Site", shape = "Treatment" ) + theme_classic() + ggtitle("Unweighted Unifrac NMDS") + theme(plot.title = element_text(hjust = 0.5, size = 18)) + geom_point(size = 3) + theme(axis.text = element_text(size = 14), axis.title=element_text(size=14))
#ggsave("../../Writing/Final_Figs/Unweighted_Unifrac_NMDS.eps")
Sampling time 2 (post treatment)
#post-treatment samples
bb16S_bac_ML_A<-subset_samples(bb16S_bac_ML, Timepoint == "A")
#extract data from phyloseq object for post-treatment
sd_adonis<-data.frame(sample_data(bb16S_bac_ML_A))
sd_adonis$Treatment <-as.factor(sd_adonis$Treatment)
sd_adonis$Site <-as.factor(sd_adonis$Site)
dist_wb<-phyloseq::distance(bb16S_bac_ML_A, method = "wunifrac")
dist_uwb<-phyloseq::distance(bb16S_bac_ML_A, method = "unifrac")
#with strata argument
adonis2(dist_wb ~ Site * Treatment, data= sd_adonis, strata = sd_adonis$Site, permutations = 1000)
adonis2(dist_uwb ~ Site * Treatment, data= sd_adonis, strata = sd_adonis$Site, permutations = 1000)
pairwise.adonis2(dist_uwb ~Treatment, strata = "Site", data = sd_adonis, nperm = 1000)
#ordinations
#weighted Unifrac
ord_nmds<-ordinate(physeq = bb16S_bac_ML_A, method = "NMDS", distance = "wunifrac")
ord_nmds$stress
plot_ordination(physeq = bb16S_bac_ML_A, ordination = ord_nmds, type = "Samples", color = "Site", shape = "Treatment" ) + theme_classic() + ggtitle("Weighted Unifrac NMDS") + theme(plot.title = element_text(hjust = 0.5, size = 18)) + geom_point(size = 3)
#ggsave("../../Writing/Final_Figs/Time2_Weighted_Unifrac_NMDS.pdf")
#unweighted Unifrac
ord_nmds<-ordinate(physeq = bb16S_bac_ML_A, method = "NMDS", distance = "unifrac")
ord_nmds$stress
plot_ordination(physeq = bb16S_bac_ML_A, ordination = ord_nmds, type = "Samples", color = "Site", shape = "Treatment" ) + theme_classic() + ggtitle("Unweighted Unifrac NMDS") + theme(plot.title = element_text(hjust = 0.5, size = 18)) + geom_point(size = 3)
#ggsave("../../Writing/Final_Figs/Time2_Unweighted_Unifrac_NMDS.pdf")
create taxa bar plot to display makeup
sort(table(data.frame(tax_table(bb16S_bac_rarefy))$Phylum), decreasing = TRUE)
length(unique(data.frame(tax_table(bb16S_bac_rarefy))$Phylum))
sort(table(data.frame(tax_table(bb16S_bac_rarefy))$Order), decreasing = TRUE)
length(unique(data.frame(tax_table(bb16S_bac_rarefy))$Order))
sort(table(data.frame(tax_table(bb16S_bac_rarefy))$Family), decreasing = TRUE)
length(unique(data.frame(tax_table(bb16S_bac_rarefy))$Family))
plot_bar(subset_samples(bb16S_bac_rarefy, Timepoint == "A"), fill = "Phylum") + scale_fill_manual(values = col_vector) + geom_bar(stat = "identity")+ coord_flip()
#ggsave("../../Writing/Final_Figs/taxabarA.pdf")
plot_bar(subset_samples(bb16S_bac_rarefy, Timepoint == "B"), fill = "Phylum") + scale_fill_manual(values = col_vector) + geom_bar(stat = "identity")+ coord_flip()
#ggsave("../../Writing/Final_Figs/taxabarB.pdf")
#Core analysis
#create super long color vector
col_vector <- c("#000000", "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059",
"#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87",
"#5A0007", "#809693", "#FEFFE6", "#1B4400", "#4FC601", "#3B5DFF", "#4A3B53", "#FF2F80",
"#61615A", "#BA0900", "#6B7900", "#00C2A0", "#FFAA92", "#FF90C9", "#B903AA", "#D16100",
"#DDEFFF", "#000035", "#7B4F4B", "#A1C299", "#300018", "#0AA6D8", "#013349", "#00846F",
"#372101", "#FFB500", "#C2FFED", "#A079BF", "#CC0744", "#C0B9B2", "#C2FF99", "#001E09",
"#00489C", "#6F0062", "#0CBD66", "#EEC3FF", "#456D75", "#B77B68", "#7A87A1", "#788D66",
"#885578", "#FAD09F", "#FF8A9A", "#D157A0", "#BEC459", "#456648", "#0086ED", "#886F4C",
"#34362D", "#B4A8BD", "#00A6AA", "#452C2C", "#636375", "#A3C8C9", "#FF913F", "#938A81",
"#575329", "#00FECF", "#B05B6F", "#8CD0FF", "#3B9700", "#04F757", "#C8A1A1", "#1E6E00",
"#7900D7", "#A77500", "#6367A9", "#A05837", "#6B002C", "#772600", "#D790FF", "#9B9700",
"#549E79", "#FFF69F", "#201625", "#72418F", "#BC23FF", "#99ADC0", "#3A2465", "#922329",
"#5B4534", "#FDE8DC", "#404E55", "#0089A3", "#CB7E98", "#A4E804", "#324E72", "#6A3A4C",
"#83AB58", "#001C1E", "#D1F7CE", "#004B28", "#C8D0F6", "#A3A489", "#806C66", "#222800",
"#BF5650", "#E83000", "#66796D", "#DA007C", "#FF1A59", "#8ADBB4", "#1E0200", "#5B4E51",
"#C895C5", "#320033", "#FF6832", "#66E1D3", "#CFCDAC", "#D0AC94", "#7ED379", "#012C58",
"#7A7BFF", "#D68E01", "#353339", "#78AFA1", "#FEB2C6", "#75797C", "#837393", "#943A4D",
"#B5F4FF", "#D2DCD5", "#9556BD", "#6A714A", "#001325", "#02525F", "#0AA3F7", "#E98176",
"#DBD5DD", "#5EBCD1", "#3D4F44", "#7E6405", "#02684E", "#962B75", "#8D8546", "#9695C5",
"#E773CE", "#D86A78", "#3E89BE", "#CA834E", "#518A87", "#5B113C", "#55813B", "#E704C4",
"#00005F", "#A97399", "#4B8160", "#59738A", "#FF5DA7", "#F7C9BF", "#643127", "#513A01",
"#6B94AA", "#51A058", "#A45B02", "#1D1702", "#E20027", "#E7AB63", "#4C6001", "#9C6966",
"#64547B", "#97979E", "#006A66", "#391406", "#F4D749", "#0045D2", "#006C31", "#DDB6D0",
"#7C6571", "#9FB2A4", "#00D891", "#15A08A", "#BC65E9", "#FFFFFE", "#C6DC99", "#203B3C",
"#671190", "#6B3A64", "#F5E1FF", "#FFA0F2", "#CCAA35", "#374527", "#8BB400", "#797868",
"#C6005A", "#3B000A", "#C86240", "#29607C", "#402334", "#7D5A44", "#CCB87C", "#B88183",
"#AA5199", "#B5D6C3", "#A38469", "#9F94F0", "#A74571", "#B894A6", "#71BB8C", "#00B433",
"#789EC9", "#6D80BA", "#953F00", "#5EFF03", "#E4FFFC", "#1BE177", "#BCB1E5", "#76912F",
"#003109", "#0060CD", "#D20096", "#895563", "#29201D", "#5B3213", "#A76F42", "#89412E",
"#1A3A2A", "#494B5A", "#A88C85", "#F4ABAA", "#A3F3AB", "#00C6C8", "#EA8B66", "#958A9F",
"#BDC9D2", "#9FA064", "#BE4700", "#658188", "#83A485", "#453C23", "#47675D", "#3A3F00",
"#061203", "#DFFB71", "#868E7E", "#98D058", "#6C8F7D", "#D7BFC2", "#3C3E6E", "#D83D66",
"#2F5D9B", "#6C5E46", "#D25B88", "#5B656C", "#00B57F", "#545C46", "#866097", "#365D25",
"#252F99", "#00CCFF", "#674E60", "#FC009C", "#92896B")
calculate core at genus and asv level using rarefied data
#calculate core at genus level
#325 genera
bb16S_bac_rarefy_tax_genus<-tax_glom(physeq = bb16S_bac_rarefy, taxrank = "Genus", NArm = F)
#calculate core
core_genus<-taxa_core(phyloseq_obj = bb16S_bac_rarefy_tax_genus, treatment = "Site", frequency = 1, abundance_threshold = 0.0001)
Warning: Column 'Sample' does not exist to removeWarning: id.vars and measure.vars are internally guessed when both are 'NULL'. All non-numeric/integer/logical type columns are considered id.vars, which in this case are columns [OTU, ...]. Consider providing at least one of 'id' or 'measure' vars in future.
core_genus<-taxa_core(phyloseq_obj = core_genus, frequency = 0.5, abundance_threshold = 0.0001)
Warning: Column 'Sample' does not exist to removeWarning: id.vars and measure.vars are internally guessed when both are 'NULL'. All non-numeric/integer/logical type columns are considered id.vars, which in this case are columns [OTU, ...]. Consider providing at least one of 'id' or 'measure' vars in future.
otu_genus_core <- data.frame(otu_table(core_genus))
tax_genus_core <- data.frame(tax_table(core_genus))
samp_data_genus_core <- data.frame(sample_data(core_genus))
percent_abund<-(otu_genus_core[,]/1100)
write.csv(percent_abund, "/Users/gordoncuster/Desktop/Git_Projects/BeeBread/Writing/core_genera_percent_abund.csv")
Core Genus
#core asv adonis
tab_adonis<-data.frame(otu_table(core_genus))
sd_adonis<-data.frame(sample_data(core_genus))
sd_adonis$Treatment <-as.factor(sd_adonis$Treatment)
sd_adonis$Site <-as.factor(sd_adonis$Site)
dist_core_w_genus<-phyloseq::distance(core_genus, method = "wunifrac")
dist_core_uw_genus<-phyloseq::distance(core_genus, method = "unifrac")
#weighted unifrac
adonis2(dist_core_w_genus ~ Site * Treatment * Timepoint, data= sd_adonis, strata = sd_adonis$Site, permutations = 1000)
#unweighted unifrac
adonis2(dist_core_uw_genus ~ Site * Treatment * Timepoint, data= sd_adonis, strata = sd_adonis$Site, permutations = 1000)
#ordinations
#weighted Unifrac
ord_nmds<-ordinate(physeq = core_genus, method = "NMDS", distance = "wunifrac")
ord_nmds$stress
plot_ordination(physeq = core_genus, ordination = ord_nmds, type = "Samples", color = "Site", shape = "Treatment" ) + theme_classic() + ggtitle("Core Genera Weighted Unifrac NMDS") + theme(plot.title = element_text(hjust = 0.5, size = 18)) + geom_point(size = 3) + theme(axis.text = element_text(size = 14), axis.title=element_text(size=14))
ggsave("../../Writing/Final_Figs/CoreGenus_Weighted_Unifrac_NMDS.eps")
#strongest grouping by Site. Time point also has a smaller effect, but these groupings are more clear than in the CAP ordination. There are several outlier samples that ordinate far to the right on the NMDS1 axis.
#unweighted Unifrac
ord_nmds<-ordinate(physeq = core_genus, method = "NMDS", distance = "unifrac")
ord_nmds$stress
plot_ordination(physeq = core_genus, ordination = ord_nmds, type = "Samples", color = "Site", shape = "Treatment" ) + theme_classic() + ggtitle("Core Genera Unweighted Unifrac NMDS") + theme(plot.title = element_text(hjust = 0.5, size = 18)) + geom_point(size = 3) + theme(axis.text = element_text(size = 14), axis.title=element_text(size=14))
ggsave("../../Writing/Final_Figs/CoreGenus_Unweighted_Unifrac_NMDS.eps")
Core genus at time point 2
core_genus_A <- subset_samples(core_genus, Timepoint == "A")
#core asv adonis
tab_adonis<-data.frame(otu_table(core_genus_A))
sd_adonis<-data.frame(sample_data(core_genus_A))
sd_adonis$Treatment <-as.factor(sd_adonis$Treatment)
sd_adonis$Site <-as.factor(sd_adonis$Site)
dist_core_w_genus<-phyloseq::distance(core_genus_A, method = "wunifrac")
dist_core_uw_genus<-phyloseq::distance(core_genus_A, method = "unifrac")
#weighted unifrac
adonis2(dist_core_w_genus ~ Site * Treatment, data= sd_adonis, strata = sd_adonis$Site, permutations = 1000)
#unweighted unifrac
adonis2(dist_core_uw_genus ~ Site * Treatment, data= sd_adonis, strata = sd_adonis$Site, permutations = 1000)
Visualization of core microbiome using barplot
#pull out core genera names
core_genus_names<-rownames(tax_table(core_genus))
genus_OTU_table<-data.frame(t(otu_table(bb16S_bac_rarefy_tax_genus)))
#check orientation of otu table
rownames(genus_OTU_table)
#add columns corresponding to core status, % sites, % reads
genus_OTU_table$Core <- NA
for(i in 1:nrow(genus_OTU_table)){
if(rownames(genus_OTU_table)[i] %in% core_genus_names == T){
genus_OTU_table[i,]$Core <- "Core"
} else{
genus_OTU_table[i,]$Core <- "NonCore"
}
}
#Prevalence column
genus_OTU_table$prev <- NA
for(i in 1:nrow(genus_OTU_table)){
genus_OTU_table[i,]$prev<-table(genus_OTU_table[i,(1:69)] > 0)["TRUE"]/ 69}
#proportional abundance
genus_OTU_table$abund <- NA
for(i in 1:nrow(genus_OTU_table)){
genus_OTU_table[i,]$abund <- rowSums(genus_OTU_table[i,c(1:69)])/sum(colSums(genus_OTU_table[,c(1:69)]))
}
dat<-genus_OTU_table[,c(70:72)]
dat$otu <- as.factor(rownames(dat))
dat$otu <- reorder(dat$otu, -dat$prev )
dat$Phylum <- as.factor(data.frame(tax_table(bb16S_bac_rarefy_tax_genus))$Phylum)
dat$number <- 1:nrow(dat)
ggplot(dat, aes(x = otu, y=prev, fill = Phylum)) +
geom_bar(stat="identity") + scale_fill_manual(values=col_vector) + theme_classic() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) + ylab("Prevalance") + theme(legend.position = "none")
#ggsave("../../Writing/Final_Figs/core_prev_owlegend.eps")
ggplot(dat, aes(x = otu, y=abund, fill = Phylum)) +
geom_bar(stat="identity") + scale_fill_manual(values=col_vector) + theme_classic() + scale_y_reverse() + theme(axis.text.x = element_text(size = 4 , angle = 45, hjust = 1))
#ggsave("../../Writing/Final_Figs/core_abund.eps")
ggplot(dat, aes(x = otu, y=abund, fill = Phylum)) +
geom_bar(stat="identity") + scale_fill_manual(values=col_vector) + theme_classic() + scale_y_reverse() + theme(axis.text.x = element_text(size = 4 , angle = 45, hjust = 1)) + theme(legend.position = "none") + ylab("Proportional Abundance") + xlab("OTU ID")
#ggsave("../../Writing/Final_Figs/core_abund_wolegend.eps")
#color by core inclusion
ggplot(dat, aes(x = otu, y=prev, fill = Core)) +
geom_bar(stat="identity") + scale_fill_manual(values=c("#88CCEE", "#999933")) +
theme_classic() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) + ylab("Prevalance") + theme(legend.position = "none") + theme(axis.text.y = element_text(size=12), axis.title.y = element_text(size = 16))
ggsave("../../Writing/Final_Figs/core_prev_owlegend_Core.eps")
ggplot(dat, aes(x = otu, y=abund, fill = Core)) +
geom_bar(stat="identity") + scale_fill_manual(values=c("#88CCEE", "#999933")) +
theme_classic() + scale_y_reverse() + theme(axis.text.x = element_text(size = 4 , angle = 45, hjust = 1, face = "bold"))+ theme( axis.text.x=element_blank(), axis.ticks.x=element_blank()) + theme(axis.text.y = element_text(size=12), axis.title.y = element_text(size = 16))
ggsave("../../Writing/Final_Figs/core_abund_Core.eps")
ggplot(dat, aes(x = otu, y=abund, fill = Core)) +
geom_bar(stat="identity") + scale_fill_manual(values=c("#88CCEE", "#999933")) +
theme_classic() + scale_y_reverse() +# theme(axis.text.x = element_text(size = 4 , angle = 45, hjust = 1)) +
theme(legend.position = "none") + ylab("Proportional Abundance") + xlab("OTU") + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) + theme(axis.text.y = element_text(size=12), axis.title.y = element_text(size = 16))
ggsave("../../Writing/Final_Figs/core_abund_wolegend_Core.eps")
analysis of lactic acid bacteria and acetic acid bacteria at time point 2 (following application)
bb16S_bac_ML_A<-subset_samples(bb16S_bac_ML, Timepoint == "A")
#ACETO
#36 taxa
acetobacter <- subset_taxa(bb16S_bac_ML_A, Family =="Acetobacteraceae")
acetobacter_glomed<- tax_glom(acetobacter, taxrank = "Family")
Warning in FUN(X[[i]], ...) :
merge_taxa attempted to reduce tree to 1 or fewer tips.
tree replaced with NULL.
#anova for all taxa combined
dat<-data.frame(otu_table(acetobacter_glomed))
samp_d<-data.frame(sample_data(acetobacter_glomed))
merged_dat<-cbind(dat, samp_d)
#check to make sure md and otu table are in same order. All should be true.
table(merged_dat$X == rownames(merged_dat))
TRUE
36
##mod<-aov(OTU_40 ~ Treatment, data = merged_dat)
#summary(mod)
#plot(residuals(mod))
#boxplot(merged_dat$OTU_40 ~ merged_dat$Treatment)
#baruta for individual taxa
aceto_df <- cbind(data.frame(otu_table(acetobacter)), data.frame(sample_data(acetobacter)))
table(aceto_df$X == rownames(aceto_df))
TRUE
36
boxplot(aceto_df$OTU_82 ~ aceto_df$Treatment)
View(tax_table(acetobacter))
#otu 82 ID'ed by baruta. More abundant in ORG. Taxonomy for this taxa "Acetobacteraceae, Bombella"
for(i in 1:length(names(data.frame(otu_table(acetobacter))))){
boxplot(unlist(aceto_df[i]) ~ aceto_df$Treatment)
}
#very zero inflated so we will not use anova.
#for(i in 1:length(names(data.frame(otu_table(acetobacter))))){
# mod<-aov(unlist(aceto_df[i]) ~ aceto_df$Treatment)
# summary(mod)
# print(summary(mod))
# print(names(aceto_df[i]))
#}
for(i in 1:length(names(data.frame(otu_table(acetobacter))))){
mod<-kruskal.test(unlist(aceto_df[i]) ~ aceto_df$Treatment)
print(mod)
print(names(aceto_df[i]))
}
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 1.2428, df = 2, p-value = 0.5372
[1] "OTU_40"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 0.077657, df = 2, p-value = 0.9619
[1] "OTU_77"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 3.4946, df = 2, p-value = 0.1742
[1] "OTU_82"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 2, df = 2, p-value = 0.3679
[1] "OTU_101"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 10.637, df = 2, p-value = 0.004899
[1] "OTU_122"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 2.178, df = 2, p-value = 0.3365
[1] "OTU_152"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 0.26006, df = 2, p-value = 0.8781
[1] "OTU_181"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 6.1258, df = 2, p-value = 0.04675
[1] "OTU_282"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 1.031, df = 2, p-value = 0.5972
[1] "OTU_694"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 2.1165, df = 2, p-value = 0.3471
[1] "OTU_714"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_1129"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 1.031, df = 2, p-value = 0.5972
[1] "OTU_1246"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_1289"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 2, df = 2, p-value = 0.3679
[1] "OTU_1322"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 2, df = 2, p-value = 0.3679
[1] "OTU_1346"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 2, df = 2, p-value = 0.3679
[1] "OTU_1374"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_1446"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_1522"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 2, df = 2, p-value = 0.3679
[1] "OTU_1608"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_1628"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 2, df = 2, p-value = 0.3679
[1] "OTU_1647"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 2, df = 2, p-value = 0.3679
[1] "OTU_1657"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_1702"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_1709"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_1755"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_1756"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_1809"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 2, df = 2, p-value = 0.3679
[1] "OTU_1816"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_1919"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 2, df = 2, p-value = 0.3679
[1] "OTU_1928"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 2, df = 2, p-value = 0.3679
[1] "OTU_2017"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_2037"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_2215"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_2255"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = 2, df = 2, p-value = 0.3679
[1] "OTU_2286"
Kruskal-Wallis rank sum test
data: unlist(aceto_df[i]) by aceto_df$Treatment
Kruskal-Wallis chi-squared = NaN, df = 2, p-value = NA
[1] "OTU_2306"
pairwise.wilcox.test(aceto_df$OTU_82, aceto_df$Treatment)
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Pairwise comparisons using Wilcoxon rank sum test with continuity correction
data: aceto_df$OTU_82 and aceto_df$Treatment
CF CON
CON 0.47 -
ORG 0.79 0.17
P value adjustment method: holm
ggplot(aceto_df, aes(x = Treatment, y = OTU_82, color = Treatment)) + geom_boxplot() + theme_classic() + theme(axis.text = element_text(size = 14), axis.title=element_text(size=14))
#ggsave("/Users/gordoncuster/Desktop/Git_Projects/BeeBread/Writing/Final_Figs/OTU_282_After_Treatment.eps")
repeat analysis with glommed at genus level
acetobacter_glomed_genus<- tax_glom(acetobacter, taxrank = "Genus")
#anova for all taxa combined
dat<-data.frame(otu_table(acetobacter_glomed_genus))
samp_d<-data.frame(sample_data(acetobacter_glomed_genus))
merged_dat<-cbind(dat, samp_d)
#check to make sure md and otu table are in same order. All should be true.
table(merged_dat$X == rownames(merged_dat))
#pull out data
aceto_df <- cbind(data.frame(otu_table(acetobacter_glomed_genus)), data.frame(sample_data(acetobacter_glomed_genus)))
table(aceto_df$X == rownames(aceto_df))
for(i in 1:length(names(data.frame(otu_table(acetobacter_glomed_genus))))){
mod<-kruskal.test(unlist(aceto_df[i]) ~ aceto_df$Treatment)
print(mod)
print(names(aceto_df[i]))
}
for(i in 1:length(names(data.frame(otu_table(acetobacter_glomed_genus))))){
mod<-aov(unlist(aceto_df[i]) ~ aceto_df$Treatment)
summary(mod)
print(summary(mod))
print(names(aceto_df[i]))
}
ggplot(aceto_df, aes(x = Treatment, y = OTU_82, color = Treatment)) + geom_boxplot() + theme_classic() + theme(axis.text = element_text(size = 14), axis.title=element_text(size=14))
tax_table(acetobacter_glomed_genus)
##LACTO
#75 taxa
lactobac <- subset_taxa(bb16S_bac_ML_A, Order =="Lactobacillales")
lactobac_glomed <- tax_glom(lactobac, taxrank = "Order")
dat<-data.frame(otu_table(lactobac_glomed))
samp_d<-data.frame(sample_data(lactobac_glomed))
merged_dat<-cbind(dat, samp_d)
#check to make sure md and otu table are in same order. All should be true.
table(merged_dat$X == rownames(merged_dat))
lacto_df <- cbind(data.frame(otu_table(lactobac)), data.frame(sample_data(lactobac)))
table(lacto_df$X == rownames(lacto_df))
boxplot(lacto_df$OTU_530 ~ lacto_df$Treatment)
View(tax_table(lactobac))
for(i in 1:length(names(data.frame(otu_table(lactobac))))){
boxplot(unlist(lacto_df[i]) ~ lacto_df$Treatment)
}
for(i in 1:length(names(data.frame(otu_table(lactobac))))){
mod<-kruskal.test(unlist(lacto_df[i]) ~ lacto_df$Treatment)
print(mod)
print(names(lacto_df[i]))
}
pairwise.wilcox.test(lacto_df$OTU_568, lacto_df$Treatment)
https://stackoverflow.com/questions/61796484/stacked-bar-chart-with-count-above-and-below-x-axis